* This do-file prepares the DADS data for various merge with inflows of new registered unemployed or with inflows of new claimants
* It lightens the variable list and aggregates the files from 2004 to 2012

* INPUT: 
* dads_2003, dads_2004, dads_2005, ... dads_2012

* OUTPUT: 
* dads_2004_2012_red
* dads_cov
* dads_2004_2012_idfhda
* price deflator (used in DADS)

clear all
set matsize 1000
set maxvar 10000
set max_memory 11g

global path_project "C:\Users\Public\Documents\resW\export_dofile_soumission\"

cd ${path_project}data\

global SOURCES=

global OUTPUT="${path_project}output\"

global last_year="2012"
global first_year="2004"

*******************************************************************************
* 

set more off
foreach year in 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 {
use ${SOURCES}dads_`year', clear



* let's first restrict to private sector
keep if sect=="PRIV" 

* suppress lines generated by UI
drop if ce=="A"

drop if missing(debremu)
drop if missing(finremu)

* a38 seems the only stable industry var.

label var idfhda "Individual identifier"
label var an 	 "Year of employment"
label var sir    "Firm identifier"
label var a38	 "Industry of the firm (36 cells)" 
label var dp     "Nb of days worked within firm X year"
label var nbheur "Nb of hours worked within firm X year"
label var sb	 "Gross wage within firm X year"
label var sbr    "Real gross wage within firm X year"
label var filtre "Non-Annex job"
label var contrat_travail " Type of labor contract"
label var cs1	 "Occupation type (1 digit)"
label var cs2	 "Occupation type (2 digits)"
label var pcs_v2 "Occupation type (4 digits) missing"
label var pcs4   "Occupation type (4 digits)"
label var annai  "Birth year of worker"
label var sx     "Gender"
label var dip_tot "Diploma"

label var entpan "First year in the DADS panel"
label var entsir "Hiring year in the firm"

capture drop mondeb
gen mondeb=floor((debremu-1)/30)+1
tab mondeb
capture drop daydeb
gen daydeb=debremu-floor((debremu-1)/30)*30
capture drop ddebE
gen ddebE=mdy(mondeb,daydeb,an)
count if missing(ddebE)
cap drop ddebE_mis
gen ddebE_mis=missing(ddebE)
cap drop ddebE_mis
replace ddebE=mdy(1,1,an) if debremu<0
format ddebE %td
capture drop daydeb mondeb
label var ddebE "Start date of employment spell"

capture drop monfin
gen monfin=floor((finremu-1)/30)+1
capture drop dayfin
gen dayfin=finremu-floor((finremu-1)/30)*30
capture drop dfinE
gen dfinE=mdy(monfin,dayfin,an)
replace dfinE=mdy(2,28,an) if missing(dfinE)
format dfinE %td
capture drop dayfin monfin
label var dfinE "End date of employment spell (incl.)"

keep idfhda an sir a38 ddebE dfinE dp nbheur sb sbr filtre contrat_travail ///
	cs1 cs2 pcs_v2 pcs4 comt dept a38 nbsa_ent comr depr annai sx dip_tot ce ///
	entpan entsir

replace pcs4=upper(pcs4)
		
compress
save dads_`year'_red, replace
}

use dads_2004_red, clear
forvalues year=2005(1)2012 {
append using dads_`year'_red
}
compress
save dads_2004_2012_red, replace


use dads_${first_year}_2012_red, clear


gen wage=sb/dp*30.5 if ce=="C"
replace wage=(sb/nbheur)*35*4.5 if ce=="P"
label var wage "FTE montly wage (DADS)"
sum wage, d
replace wage=8921 if wage>8921 & wage!=.
replace wage=1111 if wage<1111 & wage!=.
gen wage_hourly=sb/nbheur
label var wage_hourly "Hourly wage (DADS)"
sum wage_hourly, d
replace wage_hourl=53 if wage_hourly>53 & wage_hourl!=.
replace wage_hourl=6.8 if wage_hourly<6.8 & wage_hourl!=.
* lowest minimum wage over the period considered

bys sir an: egen mean_wage_fte=mean(wage)
label var mean_wage_fte "Average of FTE monthly wage in the firm X year (DADS)"
bys sir an: egen mean_wage_hourly=mean(wage_hourly)
label var mean_wage_hourly "Average of hourly wage in the firm X year (DADS)"
sum mean_wage_fte, d 
sum mean_wage_hourly, d 

bys pcs4 an: egen pcs_wage_fte=mean(wage)
label var pcs_wage_fte "Average of FTE monthly wage in the occupation (4-digit) X year (DADS)"
bys pcs4 an: egen pcs_wage_hourly=mean(wage_hourly)
label var pcs_wage_hourly "Average of hourly wage in the occupation (4-digit) X year (DADS)"

gen codegeo=dept+ comt 
gen INSEE_COM=codegeo
merge m:1 INSEE_COM using ${SOURCES}GEOFLA_COMMUNE, keepusing(X_CENTROID Y_CENTROID)
* the few unmatched are either for missing municipalities or for oversea territories
drop if inrange(substr(dept,1,2),"97","99")
drop if _m==2
rename X_CENTROID xt
rename Y_CENTROID yt
label var xt "X-geographical coordinate of workplace (centroid of firm's municipality)"
label var yt "Y-geographical coordinate of workplace (centroid of firm's municipality)"
drop _m
drop INSEE_COM
rename codegeo codegeot
label var codegeot "Municipality code of workplace" 


gen codegeo=depr+ comr 
gen INSEE_COM=codegeo
merge m:1 INSEE_COM using ${SOURCES}GEOFLA_COMMUNE, keepusing(X_CENTROID Y_CENTROID)
drop if inrange(substr(depr,1,2),"97","99")
tab dept if _m==1
drop if _m==2
rename X_CENTROID xr
rename Y_CENTROID yr
label var xr "X-geo coord of residence (centroid of municip)"
label var yr "Y-geo coord of residence (centroid of municip)"
drop _m
drop INSEE_COM
rename codegeo codegeor
label var codegeor "Municipality code of residence" 

gen INSEE_COM=depr+comr
cap drop _m
merge m:1 INSEE_COM using ${SOURCES}GEOFLA_COMMUNE, ///
	keepusing(SUPERFICIE POPULATION)
	

drop if _m==2

drop _m
rename SUPERFICIE superficie
rename POPULATION population
label var superficie "Area in hectares of residence municipality"
label var population "From commune_superficie file"
drop INSEE_COM

order idfhda an sir ddebE dfinE sb sbr wage dp nbheur wage_hourly  ///
	ce cs1 cs2 pcs4 pcs_v2 contrat_travail filtre  ///
	annai sx dip_tot depr comr codegeor xr yr superficie entpan entsir  ///
	dept comt codegeot xt yt a38 nbsa_ent mean_wage_fte mean_wage_hourly ///
	pcs_wage_fte pcs_wage_hourly population

sort idfhda sir ddebE dfinE

compress
save dads_${first_year}_2012_red, replace


 
*******************************************************************************

use dads_2004_2012_red, clear
gsort idfhda -ddebE
keep idfhda annai sx dip_tot
collapse (firstnm) annai sx dip_tot, by(idfhda)
save dads_cov, replace


*******************************************************************************

use dads_2004_2012_red, clear
keep idfhda
duplicates drop 
sort idfhda 
save dads_2004_2012_idfhda, replace


*******************************************************************************
* export deflator used in DADS

use dads_2004_2012_red, clear
gen sbr_sb=sbr/sb
drop if missing(sbr_sb)
keep an sb sbr sbr_sb
sort an
drop sbr sb
duplicates drop
save deflator, replace

